Reading the data¶
Read the pickle file created previously:
[1]:
import pickle
file_path = './filtered.pkl'
with open(file_path, 'rb') as f:
curves = pickle.load(f)
Setting up the plotting library and plotting themes:
[2]:
from ipywidgets import interact
from bokeh.palettes import Magma
from bokeh.layouts import column
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook, push_notebook
output_notebook()
color_theme = Magma[10]
def_plt_size = [600, 400]
Just plot an example time series to get a notion on the data gotten from the pickle (.pkl) file generated from the previous script:
[3]:
from datetime import datetime
from datetime import timedelta
index = 5
p = figure(x_axis_type="datetime", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Example")
p.grid.grid_line_alpha = 0
p.yaxis.axis_label = 'Intensity'
p.xaxis.axis_label = 'Time (mm/dd/yy)'
p.xgrid.band_fill_color = color_theme[0]
p.xgrid.band_fill_alpha = 0.05
time_in_days = [curves['i'][index] + timedelta(minutes=time) for time in curves['t'][index]]
p.line(time_in_days, curves['r'][index], legend='Raw Light Curve', line_width=2, color=color_theme[2], muted_alpha=0.2, line_cap='round')
p.line(time_in_days, curves['y'][index], legend='Filtered Light Curve', line_width=2, color=color_theme[5], muted_alpha=0.2, line_cap='round')
p.legend.location = "top_left"
p.legend.click_policy = "mute"
show(p)
BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead
BokehDeprecationWarning: 'legend' keyword is deprecated, use explicit 'legend_label', 'legend_field', or 'legend_group' keywords instead
Get the sizes of each time series:
[5]:
sizes = [len(curve) for curve in curves['y']]
x_values = [k+1 for k in range(len(sizes))]
sizes.sort(reverse=True)
p = figure(plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curves - Dimensions")
p.grid.grid_line_alpha = 0
p.xaxis.axis_label = 'Light Curve'
p.yaxis.axis_label = 'Size'
p.ygrid.band_fill_color = color_theme[0]
p.ygrid.band_fill_alpha = 0.05
p.line(x_values, sizes, line_width=3, color=color_theme[2], line_cap='round')
show(p) # open a browser
Compute some important constants that will be used during the analysis:
[6]:
sample_time = curves['t'][0][1] - curves['t'][0][0] # minutes
sample_freq = 1 / (60 * sample_time) # hertz
print("The series have a time sample of {} minutes, consequently a sample frequency of {} Hz".format(round(sample_time,2), round(sample_freq,6)))
The series have a time sample of 8.52 minutes, consequently a sample frequency of 0.001955 Hz
Building Time Series Frequency Analysis¶
Select one curve to create the frequency analysis, let’s say the curve indexed as index = 5. We use that curve as an example to create the routine to create time-series features for feature machine learning algorithms.
For that mather, we first present the curve and the respective frequency spectrum of the light curve with index = 5:
[8]:
index = 5
p = figure(x_axis_type="datetime", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Example")
p.grid.grid_line_alpha = 0
p.yaxis.axis_label = 'Intensity'
p.xaxis.axis_label = 'Time (mm/dd/yy)'
p.xgrid.band_fill_color = color_theme[0]
p.xgrid.band_fill_alpha = 0.05
time_in_days = [curves['i'][index] + timedelta(minutes=time) for time in curves['t'][index]]
p.line(time_in_days, curves['r'][index], legend_label='Raw Light Curve', line_width=2, color=color_theme[2], muted_alpha=0.2, line_cap='round')
p.line(time_in_days, curves['y'][index], legend_label='Filtered Light Curve', line_width=2, color=color_theme[5], muted_alpha=0.2, line_cap='round')
p.legend.location = "bottom_right"
p.legend.click_policy = "mute"
show(p)
[9]:
import scipy.signal as ssg
freq, spectra = ssg.periodogram(curves['r'][index], fs=sample_freq, scaling='density')
ffreq, fspectra = ssg.periodogram(curves['y'][index], fs=sample_freq, scaling='density')
p = figure(x_axis_type="log", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Frequency Spectrum")
p.grid.grid_line_alpha = 0
p.yaxis.axis_label = 'Magnitude'
p.xaxis.axis_label = 'Frequency (Hz)'
p.xgrid.band_fill_color = color_theme[0]
p.xgrid.band_fill_alpha = 0.05
p.line(freq, spectra, legend_label='Raw LC Spectrum', line_width=3, color=color_theme[2], muted_alpha=0.2, line_cap='round')
p.line(ffreq, fspectra, legend_label='Filtered LC Spectrum', line_width=3, color=color_theme[5], muted_alpha=0.2, line_cap='round')
p.legend.location = "top_right"
p.legend.click_policy = "mute"
show(p)
First signal preprocessing algorithm used is a detrend to remove the \(f = 0\) Hz frequency component from signal.
[10]:
import scipy.signal as ssg
detrended_data = ssg.detrend(curves['r'][index], type='linear')
# Regular Series
p = figure(x_axis_type="datetime", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Raw")
p.grid.grid_line_alpha = 0
p.yaxis.axis_label = 'Intensity'
p.xaxis.axis_label = 'Time (dd/mm/yy)'
p.xgrid.band_fill_color = color_theme[0]
p.xgrid.band_fill_alpha = 0.05
time_in_days = [curves['i'][index] + timedelta(minutes=time) for time in curves['t'][index]]
p.line(time_in_days, curves['r'][index], legend_label='Raw Light Curve', line_width=2, color=color_theme[2], muted_alpha=0.2, line_cap='round')
p.legend.location = "bottom_right"
p.legend.click_policy = "mute"
# Detrended Series
p1 = figure(x_axis_type="datetime", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Detrended")
p1.grid.grid_line_alpha = 0
p1.yaxis.axis_label = 'Intensity'
p1.xaxis.axis_label = 'Time (dd/mm/yy)'
p1.xgrid.band_fill_color = color_theme[0]
p1.xgrid.band_fill_alpha = 0.05
p1.line(time_in_days, detrended_data, legend_label='Detrended Light Curve', line_width=2, color=color_theme[7], muted_alpha=0.2, line_cap='round')
p1.legend.location = "bottom_right"
p1.legend.click_policy = "mute"
show(column(p, p1))
[11]:
freq, spectra = ssg.periodogram(curves['r'][index], fs=sample_freq, scaling='spectrum')
ffreq, fspectra = ssg.periodogram(detrended_data, fs=sample_freq, scaling='spectrum')
p = figure(x_axis_type="log", plot_width=def_plt_size[0], plot_height=def_plt_size[1], title="Light Curve Frequency Spectrum")
p.grid.grid_line_alpha = 0
p.yaxis.axis_label = 'Magnitude'
p.xaxis.axis_label = 'Frequency (Hz)'
p.xgrid.band_fill_color = color_theme[0]
p.xgrid.band_fill_alpha = 0.05
p.line(freq, spectra, legend_label='Raw LC Spectrum', line_width=3, color=color_theme[2], muted_alpha=0.2, line_cap='round')
p.line(ffreq, fspectra, legend_label='Detrended LC Spectrum', line_width=3, color=color_theme[5], muted_alpha=0.2, line_cap='round')
p.legend.location = "top_right"
p.legend.click_policy = "mute"
show(p)
Interesting enough, this is the spectrum of all the time series analysed! Of course we will detect several highly evidenced frequency components, because the time series is clearly periodic… therefore, how this spectrum varies, as the time goes on? On the next section we will present a way to show this behavior!
Creating curves spectrograms¶
The main idea behind spetrograms is to see how the time-series spectrum varies through time. And interesting enough, with spectrograms, this information is represented as an image! Therefore our information that once had only one dimension, now has two dimensions!
Firstly let’s try to see this pattern in a one dimension manner… for that we will need to create a video of the spectrum as time goes by. For that we first need to compute the spectrogram of the time-series.
To create the spectrograms, it is first necessary to think and set several parameters:
[12]:
import numpy as np
from scipy import signal
index = 5
nperseg = 550
f, t, Sxx = signal.spectrogram(
curves['r'][index],
fs=sample_freq,
noverlap=nperseg,
nperseg=nperseg+1,
detrend='linear')
Sxx = 20 * np.log10(Sxx)
Here we computed the Sxx which is the Spectrogram amplitude on the frequencies f for each time moment in t… let’s make a video on how this spectrum varies:
[16]:
p = figure(plot_height=def_plt_size[1], plot_width=def_plt_size[0],
title="Light Curve",
tools="crosshair,pan,reset,save,wheel_zoom")
l = p.line(t, curves['r'][index][nperseg:], line_width=3, color=color_theme[3], line_cap='round')
source = ColumnDataSource(data=dict(x=[t[0]], y=[curves['r'][index][nperseg]]))
c = p.circle('x', 'y', source=source, size=12, color=color_theme[7], alpha=0.3)
p1 = figure(plot_height=def_plt_size[1], plot_width=def_plt_size[0], x_axis_type="log",
title="Light Curve Frequency Variation",
y_range=[.1*Sxx.min(), Sxx.max()])
y = Sxx[:,0]
r = p1.line(f, y, line_width=3, line_alpha=0.8, color=color_theme[5], line_cap='round')
def update_data(tindex=0):
# Generate the new curve
r.data_source.data['y'] = Sxx[:,tindex]
# Generate the reference point
source.data = dict(x=[t[tindex]], y=[curves['r'][index][nperseg + tindex]])
push_notebook()
show(column(p, p1), notebook_handle=True)
[16]:
<Bokeh Notebook handle for In[16]>
[17]:
interact(update_data, tindex=(0, len(t), 5))
[17]:
<function __main__.update_data(tindex=0)>
One more simple way of providing this same inforamtion without this much ‘sparkles’… would be to generate the spectrogram image of the time series. This can be done by a heat map of the Sxx along f and t. For that one may use the following:
[19]:
p = figure( plot_height=def_plt_size[0],
plot_width=def_plt_size[0],
title="Light Curve Spectrogram",
y_axis_type="log",
tooltips=[("x", "$x"), ("y", "$y"), ("value", "@image")])
p.x_range.range_padding = p.y_range.range_padding = 0
p.yaxis.axis_label = 'Frequency (Hz)'
p.xaxis.axis_label = 'Time (s)'
# must give a vector of image data for image parameter
p.image(image=[Sxx[1:,:]], x=t[0], y=f[1], dw=t[-1], dh=f[-1], palette="Viridis256")
show(p) # open a browser